Building Maps

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2     ✓ purrr   0.3.4
## ✓ tibble  3.0.3     ✓ dplyr   1.0.2
## ✓ tidyr   1.1.2     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.5.0
## ── Conflicts ────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(maps)
## 
## Attaching package: 'maps'
## The following object is masked from 'package:purrr':
## 
##     map
library(mapdata)
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(viridis)
## Loading required package: viridisLite
library(wesanderson)
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(RColorBrewer)
library(ggplot2)

Exercise 1

#For the above graph “COVID-19 Deaths” summarize the counts for each Country on the graph and update the graph to 9/26/2020. You are doing some real life data wrangling. Data is not always in the form that you expected, so it is important to check what the results of each step are. You can summarize the counts for each country and find the median Lat and Long as a way of summarize the Lat and Long from each state. However, the US and several other countries do not have counts. This is because for some US (and other countries) the Lat and Long are NA. One strategies is to simply remove this data (which is fine for this class).

daily_report_1 <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/09-26-2020.csv")) %>% 
  rename(Long = "Long_")
## Parsed with column specification:
## cols(
##   FIPS = col_double(),
##   Admin2 = col_character(),
##   Province_State = col_character(),
##   Country_Region = col_character(),
##   Last_Update = col_datetime(format = ""),
##   Lat = col_double(),
##   Long_ = col_double(),
##   Confirmed = col_double(),
##   Deaths = col_double(),
##   Recovered = col_double(),
##   Active = col_double(),
##   Combined_Key = col_character(),
##   Incidence_Rate = col_double(),
##   `Case-Fatality_Ratio` = col_double()
## )
ggplot(daily_report_1, aes(x = Long, y = Lat, size = Confirmed/1000)) +
    borders("world", colour = "pink", fill = "grey") +
    theme_minimal() +
    geom_point(shape = 21, color='gold', fill='magenta', alpha = 0.5) +
    labs(title = 'World COVID-19 Confirmed cases',x = '', y = '',
        size="Cases (x1000))") +
    theme(legend.position = "right") +
    coord_fixed(ratio=1.5)
## Warning: Removed 81 rows containing missing values (geom_point).

Exercise 2

#Update Anisa Dhana’s graph layout of the US to 9/26/2020. You may need to adjust the range or change to a linear scale (delete trans=“log”)

daily_report_2 <-   read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/09-26-2020.csv")) %>% 
  rename(Long = "Long_") %>% 
  filter(Country_Region == "US") %>% 
  filter (!Province_State %in% c("Alaska","Hawaii", "American Samoa",
                  "Puerto Rico","Northern Mariana Islands", 
                  "Virgin Islands", "Recovered", "Guam", "Grand Princess",
                  "District of Columbia", "Diamond Princess")) %>% 
  filter(Lat > 0)
## Parsed with column specification:
## cols(
##   FIPS = col_double(),
##   Admin2 = col_character(),
##   Province_State = col_character(),
##   Country_Region = col_character(),
##   Last_Update = col_datetime(format = ""),
##   Lat = col_double(),
##   Long_ = col_double(),
##   Confirmed = col_double(),
##   Deaths = col_double(),
##   Recovered = col_double(),
##   Active = col_double(),
##   Combined_Key = col_character(),
##   Incidence_Rate = col_double(),
##   `Case-Fatality_Ratio` = col_double()
## )
### Change made to Range from c(1,7) to c(1.5) from Better Visualization

mybreaks <- c(100, 1000, 10000, 100000, 1000000)
ggplot(daily_report_2, aes(x = Long, y = Lat, size = Confirmed)) +
    borders("state", colour = "white", fill = "grey90") +
    geom_point(aes(x=Long, y=Lat, size=Confirmed, color=Confirmed),stroke=F, alpha=0.7) +
    scale_size_continuous(name="Cases", trans="log", range=c(1,5), 
                        breaks=mybreaks, labels = c("100-999",
                        "1,000-9,999", "10,000-99,999", "100,000-999,999", "1,000,000-9,999,999")) +
  
    scale_color_viridis_c(option="viridis",name="Cases",
                        trans="log", breaks=mybreaks, labels = c("100-999",
                        "1,000-9,999", "10,000-99,999", "100,000-999,999", "1,000,000-9,999,999"))  +
  
# Cleaning up the graph to update date (9/26/20)
  
  theme_void() + 
    guides( colour = guide_legend()) +
    labs(title = "Anisa Dhana's lagout for COVID-19 Confirmed Cases in the US (9/26/20)") +
    theme(
      legend.position = "bottom",
      text = element_text(color = "#22211d"),
      plot.background = element_rect(fill = "#ffffff", color = NA), 
      panel.background = element_rect(fill = "#ffffff", color = NA), 
      legend.background = element_rect(fill = "#ffffff", color = NA)
    ) +
    coord_fixed(ratio=1.5)
## Warning: Transformation introduced infinite values in discrete y-axis

## Warning: Transformation introduced infinite values in discrete y-axis
## Warning in sqrt(x): NaNs produced
## Warning: Removed 6 rows containing missing values (geom_point).

Exercise 3

#Update the above graph “Number of Confirmed Cases by US County” to 9/26/2020 and use a different color scheme or theme

# Get and format the covid report data

report_09_26_2020 <-   read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/09-26-2020.csv")) %>% 
  rename(Long = "Long_") %>% 
  unite(Key, Admin2, Province_State, sep = ".") %>% 
  group_by(Key) %>% 
  summarize(Confirmed = sum(Confirmed)) %>% 
  mutate(Key = tolower(Key))
## Parsed with column specification:
## cols(
##   FIPS = col_double(),
##   Admin2 = col_character(),
##   Province_State = col_character(),
##   Country_Region = col_character(),
##   Last_Update = col_datetime(format = ""),
##   Lat = col_double(),
##   Long_ = col_double(),
##   Confirmed = col_double(),
##   Deaths = col_double(),
##   Recovered = col_double(),
##   Active = col_double(),
##   Combined_Key = col_character(),
##   Incidence_Rate = col_double(),
##   `Case-Fatality_Ratio` = col_double()
## )
## `summarise()` ungrouping output (override with `.groups` argument)
# dim(report_09_26_2020)
# get and format the map data
us <- map_data("state")
counties <- map_data("county") %>% 
  unite(Key, subregion, region, sep = ".", remove = FALSE)

# Join the 2 tibbles
state_join <- left_join(counties, report_09_26_2020, by = c("Key"))
# sum(is.na(state_join$Confirmed))
ggplot(data = us, mapping = aes(x = long, y = lat, group = group)) + 
  coord_fixed(1.3) + 
  # Add data layer
  borders("state", colour = "black") +
  geom_polygon(data = state_join, aes(fill = Confirmed)) +
  scale_fill_gradientn(colors = brewer.pal(n = 5, name = "Pastel1"),
                       breaks = c(1, 10, 100, 1000, 10000, 100000),
                       trans = "log10", na.value = "White") +
  ggtitle("Number of Confirmed Cases by US County") +
  theme_minimal() 
## Warning: Transformation introduced infinite values in discrete y-axis

Exercise 4

#Make an interactive plot using a state of your chosing using a theme different from used in the above exammples.

#Change Massachusetts to Hawaii
daily_report_4 <-   read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/09-26-2020.csv")) %>% 
  rename(Long = "Long_") %>% 
  filter(Province_State == "New York") %>% 
  group_by(Admin2) %>% 
  summarize(Confirmed = sum(Confirmed)) %>% 
  mutate(Admin2 = tolower(Admin2))
## Parsed with column specification:
## cols(
##   FIPS = col_double(),
##   Admin2 = col_character(),
##   Province_State = col_character(),
##   Country_Region = col_character(),
##   Last_Update = col_datetime(format = ""),
##   Lat = col_double(),
##   Long_ = col_double(),
##   Confirmed = col_double(),
##   Deaths = col_double(),
##   Recovered = col_double(),
##   Active = col_double(),
##   Combined_Key = col_character(),
##   Incidence_Rate = col_double(),
##   `Case-Fatality_Ratio` = col_double()
## )
## `summarise()` ungrouping output (override with `.groups` argument)
US4 <- map_data("state")
ny_us <- subset(US4, region == "new york")
counties <- map_data("county")
ny_county <- subset(counties, region == "new york")
state_join_ex4 <- left_join(ny_county, daily_report_4, by = c("subregion" = "Admin2")) 

# plot state map

ggplotly(
  ggplot(data = ny_county, mapping = aes(x = long, y = lat, group = group)) + 
  coord_fixed(1.3) + 
# Add data layer
  geom_polygon(data = state_join_ex4, aes(fill = Confirmed), color = "white") +
  ggtitle("COVID-19 Cases in NY (9/26/20)") +
  theme(axis.line = element_blank(), axis.text = element_blank(),
        axis.ticks = element_blank(), axis.title = element_blank()) +
  scale_fill_viridis(option="plasma")
)
## Warning: `group_by_()` is deprecated as of dplyr 0.7.0.
## Please use `group_by()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.

Exercise 5

#Create a report with static maps and interactive graphs that is meant to be read by others (e.g. your friends and family). Hide warnings, messages and even the code you used so that it is readable. Included references. Link to the Lab 6 report from your Github site. Submit the link to Moodle.

Application written in R (R Core Team 2015) using the Shiny framework (Chang et al. 2015). Data aquired from (???).

REFERENCES

Chang, W., J. Cheng, JJ. Allaire, Y. Xie, and J. McPherson. 2015. “Shiny: Web Application Framework for R. R Package Version 0.12.1.” Computer Program. http://CRAN.R-project.org/package=shiny.

R Core Team. 2015. “R: A Language and Environment for Statistical Computing.” Journal Article. http://www.R-project.org.